\documentclass[a4paper,12pt,twocolumn]{article}
\usepackage{graphicx}
\usepackage{listings}
\usepackage[justification=centering]{caption}
\usepackage{subfigure}  
\usepackage{multirow}
\usepackage{balance}
\usepackage{gensymb}
\lstset{language=Matlab}
\lstset{breaklines}
\lstset{extendedchars=false}

\usepackage{amsmath,amsfonts,amsthm,amssymb}
\theoremstyle{definition}
\newtheorem{thm}{Theorem}
\newtheorem{exmp}{Example}
\newtheorem{defn}{Definition}
\newtheorem{lema}{Lemma}
\newtheorem{prop}{Proposition}
\newtheorem{coro}{Corollary}

\newcommand{\GreekAlpha}[1]{\uppercase\expandafter{\romannumeral#1}}
\newcommand{\Greekalpha}[1]{\lowercase\expandafter{\romannumeral#1}}
\renewcommand{\baselinestretch}{1.25}

%------------setlength----------------%
\setlength{\textwidth}{162mm}
%\setlength{\textheight}{190mm}
\setlength{\textheight}{231mm}
\setlength{\headheight}{-0.1cm}
\setlength{\topmargin}{-0.1cm}
\setlength{\oddsidemargin}{-0cm}
\setlength{\evensidemargin}{-0cm}

\setlength\columnsep{12pt}
\setlength\columnseprule{0.4pt}

\setlength{\parskip}{1mm}
\setlength{\unitlength}{1mm}
\setlength{\parindent}{2em}

\title{Numerical Analysis Homework \#4}

\author{Li Zhiqi\quad 3180103041 , }

\begin{document}
\maketitle
\section{Theoretical questions}
\GreekAlpha{1}.\\
With the fact that
$$477 = 2^8 + 2^7 + 2^6 + 2^4 + 2^3 + 2^2 + 2^0,$$
we have
\begin{equation}
  \begin{aligned}
  &(477)_{10} = (111011101)_2 \\
  &=
(1+\frac{1}{2}+\frac{1}{2^2}+\frac{1}{2^4}+\frac{1}{2^5}+\frac{1}{2^6}+\frac{1}{2^8})
    \times 2^8.
    \end{aligned}
\end{equation}
\GreekAlpha{2}.\\
By Algorithm 2.8 , we have
\begin{equation}
  \begin{aligned}
    \frac{3}{5} &= (0.100110011\cdots)_2\\
    &=(1.00110011\cdots)_2\times 2^{-1}\\
    &=[1 + \sum_{i = 1}^{\infty}(\frac{1}{2^{4i-1}}+\frac{1}{2^{4i}})]
    \times 2^{-1}.
    \end{aligned}
\end{equation}
\GreekAlpha{3}.\\
For $x = \beta^e$, by definition 2.22, we have
\begin{eqnarray}
  x_R = (1 + \beta^{1-p})\times \beta^e,\\
  x_L = (\beta - \beta^{1-p})\times \beta^{e-1}.
\end{eqnarray}
Where (4) holds depending on $e > L$. Hence
\begin{eqnarray}
  x_R - x = \beta^{e-p+1},\\
  x - x_L = \beta^{e-p}.
\end{eqnarray}
Then we complete the proof of $x_R - x = \beta (x - x_L)$.\\
\GreekAlpha{4}.\\
By \GreekAlpha{2}, we have
$$\frac{3}{5} = (1.00110011\cdots)_2\times 2^{-1}.$$
Under the IEEE 754 single-precision protocol, we have
\begin{eqnarray}
  x_L = (1.00110011001100110011001)_2\times 2^{-1},\\
  x_R = (1.00110011001100110011010)_2\times 2^{-1}.
\end{eqnarray}
Hence
\begin{eqnarray}
  x - x_L = (0.00\cdots00010011\cdots)_2\times 2^{-1},\\
  x_R - x = (0.00\cdots00001100\cdots)_2\times 2^{-1}.
\end{eqnarray}
Then $fl(x) = x_R$, and
\begin{eqnarray}
  \begin{aligned}
  E_{ref}(x) &= \frac{x_R - x}{x} \\
             &=\frac{(2^{-25}+2^{-26}+2^{-29}+2^{30}+\cdots)\times 2^{-1}}{3/5} \\
             &= \frac{2^{-23}}{3}.
           \end{aligned}
\end{eqnarray}
\GreekAlpha{5}.\\
By the definition of unit roundoff,
\begin{eqnarray}
  \begin{aligned}
    \epsilon_u &= \max_{x\in
       \mathcal{R}(\mathcal{F})} \min \frac{|\text{drop}(x) - x|}{\beta^{e^x
       }}\\
     &=\frac{(\frac{\beta - 1}{\beta^p}+\frac{\beta -
         1}{\beta^{p+1}}+\cdots)\times \beta^{e^x}}{\beta^{e^x}}\\
     &=\frac{\beta^{1-p}}{\beta - 1}.
  \end{aligned}
\end{eqnarray}
Under the IEEE 754 single-precision protocol, $\beta = 2, p = 24$,
hence
\begin{equation}
  \epsilon_u = 2^{-23}.
\end{equation}
\GreekAlpha{6}.\\
Notice
\begin{equation}
  1 - \frac{\cos{\frac{1}{4}}}{1} \approx 0.031,
\end{equation}
and
\begin{equation}
  2^{-6} < 0.031 < 2^{-5}.
\end{equation}
By Theorem 2.49, the subtraction $1 - \cos{\frac{1}{4}}$ will lose
bits of precision at most 6 and at least 5. \\
\GreekAlpha{7}.\\
(\Greekalpha{1}) By Taylar expansion,
\begin{equation}
  1 - \cos{x} = \sum_{n = 1}^{\infty}(-1)^{n+1}\frac{x^{2n}}{(2n)!}
\end{equation}
(\Greekalpha{2})
\begin{equation}
  1 - \cos{x} = 2\sin^2{\frac{x}{2}}.
\end{equation}
(\Greekalpha{3})
\begin{equation}
  1 - \cos{x} = \frac{\sin^2{x}}{1+\cos{x}}.
\end{equation}
\GreekAlpha{8}.\\
\begin{itemize}
\item $f(x) = (x - 1)^{\alpha},f'(x) = \alpha(x - 1)^{\alpha-1}.$
  Hence
  \begin{equation}
    C_f(x) = |\frac{xf'(x)}{f(x)}| = |\frac{\alpha x}{x-1}|.
  \end{equation}
  When $x \to 1, C_f(x) \to \infty$.
\item $f(x) = \ln{x},f'(x) = \frac{1}{x}.$
  Hence
  \begin{equation}
    C_f(x) = |\frac{xf'(x)}{f(x)}| = |\frac{1}{\ln{x}}|.
  \end{equation}
  When $x \to 1, C_f(x) \to \infty$.
\item $f(x) = e^x,f'(x) = e^x.$
  Hence
  \begin{equation}
    C_f(x) = |\frac{xf'(x)}{f(x)}| = |x|.
  \end{equation}
  When $x \to \infty, C_f(x) \to \infty$.
\item $f(x) = \arccos{x},f'(x) = -\frac{1}{\sqrt{1-x^2}}.$
  Hence
  \begin{equation}
    C_f(x) = |\frac{xf'(x)}{f(x)}| = |\frac{x}{\sqrt{1-x^2}\arccos{x}}|.
  \end{equation}
  When $x \to \pm 1, C_f(x) \to \infty$.
\end{itemize}
\GreekAlpha{9}.\\
\begin{itemize}
\item $f(x) = 1 - e^{-x},f'(x) = e^{-x}.$
  Hence
  \begin{equation}
    \text{cond}_f(x) = |\frac{xf'(x)}{f(x)}| = \frac{x}{e^x-1} \le 1, \forall x
    \in [0,1]
  \end{equation}
  Last inequality comes from the fact that
  $$x \le e^x - 1$$
  and
  $$\lim_{x \to 0} \frac{x}{e^x-1} = 1.$$
\item Resulting from $x \in \mathbb{F}$, we have
  \begin{eqnarray}
    \begin{aligned}
      f_A(x) &= \text{fl}(1-\text{fl}(e^{-x}))\\
      &=(1 - e^{-x}(1+\delta_1))(1+\delta_2),
    \end{aligned}
  \end{eqnarray}
  where $|\delta_1|,|\delta_2| < \epsilon_u$. Neglecting the quadratic
  terms of $O(\delta_i^2)$, the above equation is equivalent to
  \begin{equation}
    f_A(x) = (1 - e^{-x})(1 + \delta_2 - \delta_1\frac{1}{e^x-1}),
  \end{equation}
  hence $\phi(x) = 1 + \frac{1}{e^x-1}$. By Theorem 2.76,
  \begin{equation}
    \text{cond}_A(x) \le \frac{e^x-1}{x}(1 + \frac{1}{e^x-1}) = \frac{e^x}{x}.
  \end{equation}
\item The ploting results are as following.\\
  \begin{figure}[!htp]   
	\centering
	\includegraphics[width=8cm]{Picture/cond.eps}
	\caption{$cond_f(x)$ and $cond_A(x)$}
      \end{figure}\\
  We find that $\text{cond}_f(x)$ performs well on $[0,1]$ and
  $\text{cond}_A(x) \to \infty$ as $x \to 0$. The ill-condition is
  caused by algorithm. Exactly, it's caused by catastrophic
  cancellation when
  $$\lim_{x \to 0} e^{-x} = 1.$$
\end{itemize}
\GreekAlpha{10}.\\
By definition 2.68, we have
\begin{eqnarray}
  \label{haha}
    cond_{f}(\mathbf{a}) = \max_{i = 0,1,\cdots,n-1}\frac{|a_i\frac{\partial f}{\partial a_i}(\mathbf{a})| }{|r|}.
\end{eqnarray}
With $r = f(\mathbf{a})$, $q(r) = 0$, we have
\begin{eqnarray}
  \begin{aligned}
    &q(f(\mathbf{a})) = 0  \\
    &\Rightarrow \frac{\partial q}{\partial a_i}(r) + q'(r)\cdot\frac{\partial
      f}{\partial a_i}(\mathbf{a}) = 0 \\
    &\Rightarrow r^i + q'(r) \frac{\partial f}{\partial
      a_i}(\mathbf{a}) = 0\\
    &\Rightarrow \frac{\partial f}{\partial
      a_i}(\mathbf{a}) = -\frac{r^i}{q'(r)},
  \end{aligned}
\end{eqnarray}
where the second step do differential with respect of $a_i$ at both
sides. Then \eqref{haha} becomes
\begin{eqnarray}
  \begin{aligned}
    cond_{f}(\mathbf{a}) &= \max_{i =
      0,1,\cdots,n-1}\frac{|a_i\frac{\partial f}{\partial
        a_i}(\mathbf{a})| }{|r|}\\
    &= \frac{1}{|q'(r)|}\max_{i =0,1,\cdots,n-1}|a_ir^{i-1}|.
  \end{aligned}
\end{eqnarray}
For Wilkinson example, we have $q(x) = \prod_{k=1}^{p}(x-k) $, $r =
p$, hence
\begin{eqnarray}
  \begin{aligned}
    cond_{f} &= \frac{1}{|q'(r)|}\max_{i
      =0,1,\cdots,p-1}|a_ir^{i-1}|\\
    &= \frac{1}{(p-1)!}\max_{i
      =0,1,\cdots,p-1}|a_ir^{i-1}|\\
    &\ge \frac{1}{(p-1)!}|a_{p-1}p^{p-2}|\\
    &=\frac{1}{(p-1)!}\cdot\frac{p(p+1)}{2p^2}p^p\\
    &=\frac{(p+1)p^p}{2\cdotp!},
  \end{aligned}
\end{eqnarray}
which has the similar form as the one in Example 2.64. The comparison
tell us that the problem of root finding for polynomials with very
high degrees is hopeless.\\
\GreekAlpha{11}.\\
Consider the case $p = 4$. We take $a = 4.995\times10^1, b =
9.999\times10^1$, then
\begin{eqnarray}
  \begin{aligned}
    \frac{a}{b} &= 0.49954995\\
    &\to 0.4995500\\
    &\to 4.995500\times10^{-1}\\
    &\to 4.996\times10^{-1},
  \end{aligned}
\end{eqnarray}
where the three $\to$ follow from (\Greekalpha{2}),(\Greekalpha{3})
and (\Greekalpha{5}) in Definition 2.38 respectively.

However, $a/b$ is supposed to be $4.995\times10^{-1}$ by definition of
roundoff. It shows that the presision of the register is at least
$2p+1$.
For other p, the example is similar.\\
\GreekAlpha{12}.\\
Assume the distance between $x_i$ and $x_{i-1}$ is much smaller than
others, here $i > 2$ and $i < N - 1$. Then we have
$$\mu_i = \frac{x_i - x_{i-1}}{x_{i+1} - x_{i-1}} \approx 0, \lambda_i
= \frac{x_{i+1} - x_{i}}{x_{i+1} - x_{i-1}} \approx 1,$$
and 
$$\mu_{i-1} \approx 1, \lambda_{i-1} \approx 0.$$
Consider the complete case, notice the coefficient matrix
$$\begin{bmatrix}
  2&  \mu_2&  &  &  &  &  & \\
  \lambda_3&  2&  \mu_3&  &  &  &  & \\
  &  &  \ddots &  &  &  &  & \\
  &  &  \lambda_{i-1}&  2&  \mu_{i-1}&  &  & \\
  &  &  &  \lambda_{i}&  2& \mu_{i} &  & \\
  &  &  &  &  &  \ddots&  & \\
  &  &  &  &  &  & \lambda_{N-1} &2
\end{bmatrix}$$.
However, the bad distance condition does not lead to a bad condition
number. After a series of numerical tests, I hold the view that a
small distance condition will not cause inaccuracy.\\
\GreekAlpha{13}(addition A).\\
By IEEE standard 754,
\begin{eqnarray}
  \begin{aligned}
    \left [ 128,129 \right ] &= [1\times 2^7,(1+\frac{1}{2^7})\times2^7] \\
    &= [(1)_2\times2^7,(1.0000001)_2\times2^7]
  \end{aligned}
\end{eqnarray}
with p = 24. Hence the smallest interval we can get in bisection
method has length
$$2^{-23}\times 2^7 = 2^{-16}.$$
When the root is lying at the midpoint of above interval, we compute
this root with absolute error
$$\frac{1}{2}\times2^{-16} = 2^{-17} \approx 7.6\times10^{-6} >
10^{-6},$$
which shows we can not compute the root with absolute accuracy $<
10^{-6}$.
\GreekAlpha{14}(addition B).\\
By definition 2.59, it's easy to get
$$\text{cond}_f(x) = \frac{x}{\sin{x}}, \ x\in(0,\frac{\pi}{2}).$$
Resulting from the assumptions on $\sin{x}$ and $\cos{x}$, we have
  \begin{eqnarray}
    \begin{aligned}
      f_A(x)
      & = \text{fl}[\frac{\text{fl}(\sin{x})}{\text{fl}(1+\text{fl}(\cos{x}))}]\\
      & = \frac{\sin{x}(1+\delta_1)}{(1+\cos{x}(1+\delta_2))(1+\delta_3)}(1+\delta_4),
    \end{aligned}
  \end{eqnarray}
  where $|\delta_i| < \epsilon_u, i = 1,2,3,4$. Neglecting the quadratic
  terms of $O(\delta_i^2)$, the above equation is equivalent to
  \begin{equation}
    f_A(x) = (\frac{\sin{x}}{1+\cos{x}})(1 + \delta_1 +\delta_4 -\delta_3 - \delta_2\frac{\cos{x}}{1+\cos{x}}),
  \end{equation}
  hence $\phi(x) = 3 + \frac{\cos{x}}{1+\cos{x}}$. By Theorem 2.76,
  \begin{equation}
    \text{cond}_A(x) \le \frac{\sin{x}}{x}(3 +
    \frac{\cos{x}}{1+\cos{x}}) .
  \end{equation}
  With the fact that $\sin{x} < x, \cos{x} < 1 + \cos{x}$, we prove
  that $\text{cond}_A(x) < 4, x\in(0,\frac{\pi}{2})$. It succeeds to avoid catastrophic
  cancellation when $x \to 0$.
\newpage
\section{programming}
A. Run code/problemA.m by matlab to get following result.\\
\begin{figure}[!htp]   
	\centering
	\includegraphics[width=8cm]{Picture/A.eps}
	\caption{three functions near 1.0}
\end{figure}\\
Here red, blue and black line represent the function in
(2.49a),(2.49b) and (2.49c) respectively. \\
We find that (2.49a) and
(2.49b) oscillate sharply, while (2.49c) performs smoothly. (2.49c) is
the most accurate. That's because (2.49a) and (2.49b) do lots of
floating-point operations and the propagation of rounding errors makes
the result being garbarge.\\\\
B. Run code/problemB.m by matlab to get following result.\\
\begin{itemize}
\item
    \begin{align*}
      \text{UFL}(\mathbb{F}) &= \beta^L = 0.5,\\
      \text{OFL}(\mathbb{F}) &= \beta^U(\beta - \beta^{1-p}) = 3.5.
    \end{align*}
\item  All numbers in $\mathbb{F}$ :
    \begin{align*}
      &-3.5,-3,-2.5,-2,-1.75,\\
      &-1.5,-1.25,-1,-0.875,-0.75,\\
      &-0.625,-0.5,0,0.5,0.625,\\
      &0.75,0.875,1,1.25,1.5,\\
      &1.75,2,2.5,3,3.5.
    \end{align*}
    The number of numbers in $\mathbb{F}$ is 25, while
    $$\#\mathcal{F} = 2^p(U-L+1)+1 = 25.$$

  \item  All subnormal numbers of $\mathbb{F}$ :
    \begin{align*}
   -0.375,-0.25,-0.125,0,0.125,0.25,0.375
    \end{align*}
\item We show $\mathbb{F}$ and extended $\mathbb{F}$ on real axis
  here:\\
  \begin{figure}[!htp]   
	\centering
	\includegraphics[width=8cm]{Picture/B.eps}
	\caption{$\mathbb{F}$ and extended $\mathbb{F}$ on real axis}
      \end{figure}\\
  Where solid point '$\cdot$' denotes the number of $\mathbb{F}$ or
  extended $\mathbb{F}$, and hollow point '$\circ $' denotes integer on real axis.     
\end{itemize}


\end{document}
